import json, gzip, csv
from collections import Counter

source = gzip.open("WeightVectors.json.gz",'r')
alternate = open("alternate_weights.csv",'w')
fwriter = csv.writer(alternate, delimiter="\t", quotechar='"', quoting=csv.QUOTE_MINIMAL)
t1 = open("bookids2.json",'w')
buf = source.readline()
t1.write(buf)
BOOKS = sorted(json.loads(buf).keys())
t1.close()
t2 = open("vocabulary.json",'w')
buf = source.readline()
t2.write(buf)
VOCABULARY = sorted(json.loads(buf))
fwriter.writerow(VOCABULARY)
t2.close()
buf = source.readline()
temp = []
iterator = 0
while buf != "":
	print "copying book: ", iterator
	TFIDF = json.loads(buf)
	TF = Counter(json.loads(gzip.open("BOOK_TF/"+BOOKS[iterator]+".json.gz",'r').readline()))
	for word in [term for term in VOCABULARY if TF[term] == 0]: TFIDF[VOCABULARY.index(word)] = 0
	fwriter.writerow(TFIDF)
	buf = source.readline()
	iterator += 1

alternate.close()
source.close()


fwriter = open("weightvectors.json",'w')
fwriter.write(qwerty)
fwriter.close()
